{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9b0aef6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import modules\n",
    "import pandas as pd, json\n",
    "from elasticsearch import Elasticsearch\n",
    "from elasticsearch.helpers import bulk\n",
    "from getpass import getpass\n",
    "from urllib.request import urlopen\n",
    "from pprint import pprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a5b8cdff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'n1BjmRPcR2GObT6ZMbJ9xA', 'version': {'number': '8.11.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'd9ec3fa628c7b0ba3d25692e277ba26814820b20', 'build_date': '2023-11-04T10:04:57.184859352Z', 'build_snapshot': False, 'lucene_version': '9.8.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "elastic_user = \"elastic\"\n",
    "elastic_password = \"o6G_pvRL=8P*7on+o6XH\"\n",
    "elastic_endpoint = \"localhost\"\n",
    "url = f\"https://{elastic_user}:{elastic_password}@{elastic_endpoint}:9200\"\n",
    "es = Elasticsearch(url, ca_certs = \"./http_ca.crt\", verify_certs = True)\n",
    "\n",
    "es.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3e028252",
   "metadata": {},
   "source": [
    "# Ingest pipeline setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a1935b53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'acknowledged': True}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_73164/3688832360.py:44: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n",
      "  response = es.ingest.put_pipeline(id=pipeline_id, body=pipeline)\n"
     ]
    }
   ],
   "source": [
    "pipeline = {\n",
    "  \"processors\": [\n",
    "    {\n",
    "      \"inference\": {\n",
    "        \"field_map\": {\n",
    "          \"my_text\": \"text_field\"\n",
    "        },\n",
    "        \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
    "        \"target_field\": \"ml.inference.my_vector\",\n",
    "        \"on_failure\": [\n",
    "          {\n",
    "            \"append\": {\n",
    "              \"field\": \"_source._ingest.inference_errors\",\n",
    "              \"value\": [\n",
    "                {\n",
    "                  \"message\": \"Processor 'inference' in pipeline 'ml-inference-title-vector' failed with message '{{ _ingest.on_failure_message }}'\",\n",
    "                  \"pipeline\": \"ml-inference-title-vector\",\n",
    "                  \"timestamp\": \"{{{ _ingest.timestamp }}}\"\n",
    "                }\n",
    "              ]\n",
    "            }\n",
    "          }\n",
    "        ]\n",
    "      }\n",
    "    },\n",
    "    {\n",
    "      \"set\": {\n",
    "        \"field\": \"my_vector\",\n",
    "        \"if\": \"ctx?.ml?.inference != null && ctx.ml.inference['my_vector'] != null\",\n",
    "        \"copy_from\": \"ml.inference.my_vector.predicted_value\",\n",
    "        \"description\": \"Copy the predicted_value to 'my_vector'\"\n",
    "      }\n",
    "    },\n",
    "    {\n",
    "      \"remove\": {\n",
    "        \"field\": \"ml.inference.my_vector\",\n",
    "        \"ignore_missing\": True\n",
    "      }\n",
    "    }\n",
    "  ]\n",
    "}\n",
    "\n",
    "pipeline_id = 'vector_embedding_demo'\n",
    "response = es.ingest.put_pipeline(id=pipeline_id, body=pipeline)\n",
    "\n",
    "# Print the response\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "65932fef",
   "metadata": {},
   "source": [
    "# Index Mapping / Template setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e1a7a39f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'acknowledged': True}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_73164/624545924.py:34: ElasticsearchWarning: Legacy index templates are deprecated in favor of composable templates.\n",
      "  response = es.indices.put_template(name=\"my_vector_index\",\n"
     ]
    }
   ],
   "source": [
    "index_patterns = [\n",
    "    \"my_vector_index-*\"\n",
    "  ]\n",
    "\n",
    "order = 1\n",
    "\n",
    "settings = {\n",
    "      \"number_of_shards\": 1,\n",
    "      \"number_of_replicas\": 1,\n",
    "      \"index.default_pipeline\": pipeline_id\n",
    "    }\n",
    "\n",
    "mappings = {\n",
    "      \"properties\": {\n",
    "        \"my_vector\": {\n",
    "          \"type\": \"dense_vector\",\n",
    "          \"dims\": 768,\n",
    "          \"index\": True,\n",
    "          \"similarity\": \"dot_product\"\n",
    "        },\n",
    "        \"my_text\": {\n",
    "          \"type\": \"text\"\n",
    "        }\n",
    "      },\n",
    "      \"_source\": {\n",
    "        \"excludes\": [\n",
    "          \"my_vector\"\n",
    "        ]\n",
    "      }\n",
    "    }\n",
    "\n",
    "\n",
    "# Create the index template\n",
    "response = es.indices.put_template(name=\"my_vector_index\",\n",
    "                                   index_patterns=index_patterns,\n",
    "                                   order=order,\n",
    "                                   settings=settings,\n",
    "                                   mappings=mappings\n",
    "                                   )\n",
    "\n",
    "\n",
    "# Print the response\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9bfc79e2",
   "metadata": {},
   "source": [
    "# Indexing Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7181a4ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "index_name = 'my_vector_index-01'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "74a26720",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'_shards': {'total': 2, 'successful': 1, 'failed': 0}})"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [\n",
    "    (\"Hey, careful, man, there's a beverage here!\", \"The Dude\"),\n",
    "    (\"I’m The Dude. So, that’s what you call me. You know, that or, uh, His Dudeness, or, uh, Duder, or El Duderino, if you’re not into the whole brevity thing\", \"The Dude\"),\n",
    "    (\"You don't go out looking for a job dressed like that? On a weekday?\", \"The Big Lebowski\"),\n",
    "    (\"What do you mean brought it bowling, Dude?\", \"Walter Sobchak\"),\n",
    "    (\"Donny was a good bowler, and a good man. He was one of us. He was a man who loved the outdoors... and bowling, and as a surfer he explored the beaches of Southern California, from La Jolla to Leo Carrillo and... up to... Pismo\", \"Walter Sobchak\")\n",
    "]\n",
    "\n",
    "actions = [\n",
    "    {\n",
    "        \"_op_type\": \"index\",\n",
    "        \"_index\": \"my_vector_index-01\",\n",
    "        \"_source\": {\n",
    "            \"my_text\": text,\n",
    "            \"my_metadata\": metadata\n",
    "        }\n",
    "    } for text, metadata in data\n",
    "]\n",
    "\n",
    "bulk(es, actions)\n",
    "\n",
    "# Refresh the index to make sure all data is searchable\n",
    "es.indices.refresh(index=\"my_vector_index-01\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7f9583f2",
   "metadata": {},
   "source": [
    "# Querying Data\n",
    "Approximate k-nearest neighbor (kNN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "810f42ef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'_id': '5In5FIwBCEhyMiaWbFjR',\n",
      "  '_index': 'my_vector_index-01',\n",
      "  '_score': 0.7817012,\n",
      "  '_source': {'ml': {'inference': {}},\n",
      "              'my_metadata': 'The Dude',\n",
      "              'my_text': \"Hey, careful, man, there's a beverage here!\"}}]\n"
     ]
    }
   ],
   "source": [
    "knn = {\n",
    "      \"field\": \"my_vector\",\n",
    "      \"k\": 1,\n",
    "      \"num_candidates\": 5,\n",
    "      \"query_vector_builder\": {\n",
    "        \"text_embedding\": {\n",
    "          \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
    "          \"model_text\": \"Watchout I have a drink\"\n",
    "        }\n",
    "      }\n",
    "    }\n",
    "\n",
    "response = es.search(\n",
    "    index=index_name,\n",
    "    knn=knn,\n",
    "    source=True)\n",
    "\n",
    "pprint(response['hits']['hits'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "270a81b6",
   "metadata": {},
   "source": [
    "## Hybrid Searching (kNN + BM25) with RRF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f1ef3786",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'_id': '54n5FIwBCEhyMiaWbFjR',\n",
      "  '_index': 'my_vector_index-01',\n",
      "  '_score': 1.8080788,\n",
      "  'fields': {'my_metadata': ['Walter Sobchak'],\n",
      "             'my_text': ['What do you mean brought it bowling, Dude?']}},\n",
      " {'_id': '6In5FIwBCEhyMiaWbFjR',\n",
      "  '_index': 'my_vector_index-01',\n",
      "  '_score': 1.235873,\n",
      "  'fields': {'my_metadata': ['Walter Sobchak'],\n",
      "             'my_text': ['Donny was a good bowler, and a good man. He was one '\n",
      "                         'of us. He was a man who loved the outdoors... and '\n",
      "                         'bowling, and as a surfer he explored the beaches of '\n",
      "                         'Southern California, from La Jolla to Leo Carrillo '\n",
      "                         'and... up to... Pismo']}}]\n"
     ]
    }
   ],
   "source": [
    "query = {\n",
    "    \"match\": {\n",
    "      \"my_text\": \"bowling\"\n",
    "    }\n",
    "  }\n",
    "\n",
    "knn ={\n",
    "      \"field\": \"my_vector\",\n",
    "      \"k\": 3,\n",
    "      \"num_candidates\": 5,\n",
    "      \"query_vector_builder\": {\n",
    "        \"text_embedding\": {\n",
    "          \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
    "          \"model_text\": \"He enjoyed the game\"\n",
    "        }\n",
    "      }\n",
    "    }\n",
    "\n",
    "rank: {\n",
    "        \"rrf\": {}\n",
    "    }\n",
    "\n",
    "fields = [\n",
    "    \"my_text\",\n",
    "    \"my_metadata\"\n",
    "  ]\n",
    "\n",
    "\n",
    "response = es.search(\n",
    "    index=index_name,\n",
    "    fields=fields,\n",
    "    knn=knn,\n",
    "    query=query,\n",
    "    size=2,\n",
    "    source=False\n",
    "    )\n",
    "\n",
    "pprint(response['hits']['hits'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab7345cb",
   "metadata": {},
   "source": [
    "## Filtering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "19f25db8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'_id': '5In5FIwBCEhyMiaWbFjR',\n",
      "  '_index': 'my_vector_index-01',\n",
      "  '_score': 0.59285694,\n",
      "  'fields': {'my_metadata': ['The Dude'],\n",
      "             'my_text': [\"Hey, careful, man, there's a beverage here!\"]}}]\n"
     ]
    }
   ],
   "source": [
    "knn ={\n",
    "    \"field\": \"my_vector\",\n",
    "    \"k\": 1,\n",
    "    \"num_candidates\": 5,\n",
    "    \"query_vector_builder\": {\n",
    "      \"text_embedding\": {\n",
    "        \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
    "        \"model_text\": \"Did you bring the dog?\"\n",
    "      }\n",
    "    },\n",
    "    \"filter\": {\n",
    "      \"term\": {\n",
    "        \"my_metadata.keyword\": \"The Dude\"\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "\n",
    "fields = [\n",
    "    \"my_text\",\n",
    "    \"my_metadata\"\n",
    "  ]\n",
    "\n",
    "response = es.search(\n",
    "    index=index_name,\n",
    "    fields=fields,\n",
    "    knn=knn,\n",
    "    source=False\n",
    "    )\n",
    "\n",
    "pprint(response['hits']['hits'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5a7c341",
   "metadata": {},
   "source": [
    "# Aggregrations\n",
    "and Select fields returned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "877ed7f7",
   "metadata": {},
   "outputs": [
    {
     "ename": "BadRequestError",
     "evalue": "BadRequestError(400, 'search_phase_execution_exception', 'Fielddata is disabled on [my_metadata] in [my_vector_index-01]. Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [my_metadata] in order to load field data by uninverting the inverted index. Note that this can use significant memory.')",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mBadRequestError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[16], line 26\u001b[0m\n\u001b[1;32m     13\u001b[0m aggs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m     15\u001b[0m       \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mterms\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     18\u001b[0m     }\n\u001b[1;32m     19\u001b[0m   }\n\u001b[1;32m     21\u001b[0m fields \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m     22\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmy_text\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     23\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmy_metadata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     24\u001b[0m   ]\n\u001b[0;32m---> 26\u001b[0m response \u001b[38;5;241m=\u001b[39m es\u001b[38;5;241m.\u001b[39msearch(\n\u001b[1;32m     27\u001b[0m     index\u001b[38;5;241m=\u001b[39mindex_name,\n\u001b[1;32m     28\u001b[0m     fields\u001b[38;5;241m=\u001b[39mfields,\n\u001b[1;32m     29\u001b[0m     aggs\u001b[38;5;241m=\u001b[39maggs,\n\u001b[1;32m     30\u001b[0m     knn\u001b[38;5;241m=\u001b[39mknn,\n\u001b[1;32m     31\u001b[0m     source\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m     32\u001b[0m     )\n\u001b[1;32m     34\u001b[0m pprint(response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhits\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhits\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
      "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/elasticsearch/_sync/client/utils.py:414\u001b[0m, in \u001b[0;36m_rewrite_parameters.<locals>.wrapper.<locals>.wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    411\u001b[0m         \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m    412\u001b[0m             \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m api(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/elasticsearch/_sync/client/__init__.py:3924\u001b[0m, in \u001b[0;36mElasticsearch.search\u001b[0;34m(self, index, aggregations, aggs, allow_no_indices, allow_partial_search_results, analyze_wildcard, analyzer, batched_reduce_size, ccs_minimize_roundtrips, collapse, default_operator, df, docvalue_fields, error_trace, expand_wildcards, explain, ext, fields, filter_path, from_, highlight, human, ignore_throttled, ignore_unavailable, indices_boost, knn, lenient, max_concurrent_shard_requests, min_compatible_shard_node, min_score, pit, post_filter, pre_filter_shard_size, preference, pretty, profile, q, query, rank, request_cache, rescore, rest_total_hits_as_int, routing, runtime_mappings, script_fields, scroll, search_after, search_type, seq_no_primary_term, size, slice, sort, source, source_excludes, source_includes, stats, stored_fields, suggest, suggest_field, suggest_mode, suggest_size, suggest_text, terminate_after, timeout, track_scores, track_total_hits, typed_keys, version)\u001b[0m\n\u001b[1;32m   3922\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m __body \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3923\u001b[0m     __headers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent-type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapplication/json\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 3924\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperform_request(  \u001b[38;5;66;03m# type: ignore[return-value]\u001b[39;00m\n\u001b[1;32m   3925\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPOST\u001b[39m\u001b[38;5;124m\"\u001b[39m, __path, params\u001b[38;5;241m=\u001b[39m__query, headers\u001b[38;5;241m=\u001b[39m__headers, body\u001b[38;5;241m=\u001b[39m__body\n\u001b[1;32m   3926\u001b[0m )\n",
      "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/elasticsearch/_sync/client/_base.py:320\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m    317\u001b[0m         \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m    318\u001b[0m             \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 320\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTP_EXCEPTIONS\u001b[38;5;241m.\u001b[39mget(meta\u001b[38;5;241m.\u001b[39mstatus, ApiError)(\n\u001b[1;32m    321\u001b[0m         message\u001b[38;5;241m=\u001b[39mmessage, meta\u001b[38;5;241m=\u001b[39mmeta, body\u001b[38;5;241m=\u001b[39mresp_body\n\u001b[1;32m    322\u001b[0m     )\n\u001b[1;32m    324\u001b[0m \u001b[38;5;66;03m# 'X-Elastic-Product: Elasticsearch' should be on every 2XX response.\u001b[39;00m\n\u001b[1;32m    325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verified_elasticsearch:\n\u001b[1;32m    326\u001b[0m     \u001b[38;5;66;03m# If the header is set we mark the server as verified.\u001b[39;00m\n",
      "\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'search_phase_execution_exception', 'Fielddata is disabled on [my_metadata] in [my_vector_index-01]. Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [my_metadata] in order to load field data by uninverting the inverted index. Note that this can use significant memory.')"
     ]
    }
   ],
   "source": [
    "knn = {\n",
    "    \"field\": \"my_vector\",\n",
    "    \"k\": 2,\n",
    "    \"num_candidates\": 5,\n",
    "    \"query_vector_builder\": {\n",
    "      \"text_embedding\": {\n",
    "        \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
    "        \"model_text\": \"did you bring it?\"\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "\n",
    "aggs = {\n",
    "    \"metadata\": {\n",
    "      \"terms\": {\n",
    "        \"field\": \"my_metadata\"\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "\n",
    "fields = [\n",
    "    \"my_text\",\n",
    "    \"my_metadata\"\n",
    "  ]\n",
    "\n",
    "response = es.search(\n",
    "    index=index_name,\n",
    "    fields=fields,\n",
    "    aggs=aggs,\n",
    "    knn=knn,\n",
    "    source=False\n",
    "    )\n",
    "\n",
    "pprint(response['hits']['hits'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16d58c37",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python",
   "language": "python",
   "name": "py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
